In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import mpl_toolkits as mplot3d
In [2]:
# DF1 = Total and facet scores, and score per question
df1 = pd.read_excel('Survey Data.xlsx', sheet_name = 'Risk Facet scores')
df1
Out[2]:
| responseid | q8_2_1 | q8_2_2 | q8_2_3 | q8_2_4 | q8_2_5 | q8_3 | q8_4 | q8_5berlin_1 | q8_5london_1 | ... | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6873 | 1 | 1 | 1 | 1 | 2 | 2 | 4 | 1 | 1 | ... | 2 | 1 | 1 | 6 | 20 | 50 | 40 | 33.333333 | 25 | 31.578947 |
| 1 | 14129 | 2 | 2 | 2 | 1 | 2 | 4 | 3 | 1 | 1 | ... | 3 | 1 | 2 | 10 | 80 | 0 | 60 | 33.333333 | 50 | 52.631579 |
| 2 | 5861 | 1 | 1 | 1 | 2 | 2 | 4 | 4 | 2 | 1 | ... | 3 | 1 | 1 | 8 | 40 | 50 | 60 | 33.333333 | 25 | 42.105263 |
| 3 | 13364 | 2 | 1 | 2 | 1 | 1 | 4 | 1 | 2 | 1 | ... | 3 | 0 | 0 | 5 | 40 | 0 | 60 | 0.000000 | 0 | 26.315789 |
| 4 | 14112 | 2 | 2 | 2 | 2 | 2 | 2 | 3 | 2 | 1 | ... | 3 | 2 | 3 | 13 | 100 | 0 | 60 | 66.666667 | 75 | 68.421053 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 647 | 14135 | 1 | 1 | 2 | 1 | 2 | 4 | 3 | 2 | 1 | ... | 5 | 3 | 3 | 13 | 40 | 0 | 100 | 100.000000 | 75 | 68.421053 |
| 648 | 14155 | 2 | 2 | 2 | 2 | 2 | 4 | 4 | 2 | 1 | ... | 4 | 1 | 3 | 14 | 100 | 50 | 80 | 33.333333 | 75 | 73.684211 |
| 649 | 14274 | 1 | 1 | 1 | 2 | 2 | 3 | 4 | 2 | 1 | ... | 4 | 3 | 3 | 13 | 40 | 50 | 80 | 100.000000 | 75 | 68.421053 |
| 650 | 14448 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | ... | 2 | 1 | 0 | 8 | 100 | 0 | 40 | 33.333333 | 0 | 42.105263 |
| 651 | 15717 | 1 | 2 | 1 | 1 | 2 | 4 | 4 | 2 | 1 | ... | 3 | 3 | 3 | 12 | 40 | 50 | 60 | 100.000000 | 75 | 63.157895 |
652 rows × 32 columns
In [3]:
# DF2 = Facet scores and total score, with Income, wealth and ISCED
df2 = pd.read_excel('Survey Data.xlsx', sheet_name = 'MAIN')
df2
Out[3]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6873 | 6 | NaN | 1 | 25 | 2 | 1 | 1 | 2 | 1 | 1 | 6 | 20 | 50 | 40 | 33.333333 | 25 | 31.578947 |
| 1 | 14129 | 1 | 3000.0 | 2 | 30 | 2 | 4 | 0 | 3 | 1 | 2 | 10 | 80 | 0 | 60 | 33.333333 | 50 | 52.631579 |
| 2 | 5861 | 2 | 0.0 | 1 | 43 | 3 | 2 | 1 | 3 | 1 | 1 | 8 | 40 | 50 | 60 | 33.333333 | 25 | 42.105263 |
| 3 | 13364 | 5 | 105000.0 | 1 | 32 | 2 | 2 | 0 | 3 | 0 | 0 | 5 | 40 | 0 | 60 | 0.000000 | 0 | 26.315789 |
| 4 | 14112 | 3 | 34865.0 | 2 | 32 | 2 | 5 | 0 | 3 | 2 | 3 | 13 | 100 | 0 | 60 | 66.666667 | 75 | 68.421053 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 647 | 14135 | 1 | 500.0 | 2 | 33 | 2 | 2 | 0 | 5 | 3 | 3 | 13 | 40 | 0 | 100 | 100.000000 | 75 | 68.421053 |
| 648 | 14155 | 6 | NaN | 2 | 29 | 2 | 5 | 1 | 4 | 1 | 3 | 14 | 100 | 50 | 80 | 33.333333 | 75 | 73.684211 |
| 649 | 14274 | 2 | NaN | 2 | 74 | 4 | 2 | 1 | 4 | 3 | 3 | 13 | 40 | 50 | 80 | 100.000000 | 75 | 68.421053 |
| 650 | 14448 | 3 | NaN | 2 | 31 | 2 | 5 | 0 | 2 | 1 | 0 | 8 | 100 | 0 | 40 | 33.333333 | 0 | 42.105263 |
| 651 | 15717 | 1 | NaN | 2 | 26 | 2 | 2 | 1 | 3 | 3 | 3 | 12 | 40 | 50 | 60 | 100.000000 | 75 | 63.157895 |
652 rows × 18 columns
In [4]:
# The following graph represents frequency of each data point on "Total Score_19" or the total score out of 19 questions across the sample.
df2['Total Score_19'].plot(kind = 'hist', xticks = np.arange(0, 20, step=1), xlabel = 'Total Score_19', title = 'Frequency of Scores' )
Out[4]:
<Axes: title={'center': 'Frequency of Scores'}, xlabel='Total Score_19', ylabel='Frequency'>
In [5]:
# Absolute mean scores for each facet
(df2[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4','Total Score_19']].mean(axis = 0)).plot(kind = 'bar', title = 'Absolute mean of scores for above data set')
Out[5]:
<Axes: title={'center': 'Absolute mean of scores for above data set'}>
In [6]:
# Normalised mean scores for each facet
df2[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean(axis = 0).plot(kind = 'bar', title = 'Normalised mean of scores for above data set')
Out[6]:
<Axes: title={'center': 'Normalised mean of scores for above data set'}>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [7]:
# DF3 = Sorted by income
df3 = df2.sort_values(by = 'Q12. Income')
df3 = df3.reset_index(drop = True)
df3.drop(df3[df3['Q12. Income'] == 7].index, inplace = True)
df3
Out[7]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 15717 | 1 | NaN | 2 | 26 | 2 | 2 | 1 | 3 | 3 | 3 | 12 | 40 | 50 | 60 | 100.000000 | 75 | 63.157895 |
| 1 | 219 | 1 | 80000.0 | 2 | 47 | 3 | 5 | 1 | 4 | 2 | 3 | 15 | 100 | 50 | 80 | 66.666667 | 75 | 78.947368 |
| 2 | 845 | 1 | NaN | 3 | 50 | 3 | 1 | 0 | 2 | 1 | 1 | 5 | 20 | 0 | 40 | 33.333333 | 25 | 26.315789 |
| 3 | 8599 | 1 | 2500.0 | 3 | 42 | 3 | 3 | 1 | 3 | 3 | 1 | 11 | 60 | 50 | 60 | 100.000000 | 25 | 57.894737 |
| 4 | 12715 | 1 | 200.0 | 2 | 31 | 2 | 3 | 0 | 2 | 0 | 2 | 7 | 60 | 0 | 40 | 0.000000 | 50 | 36.842105 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 572 | 7893 | 6 | 1000.0 | 1 | 41 | 3 | 1 | 1 | 3 | 1 | 4 | 10 | 20 | 50 | 60 | 33.333333 | 100 | 52.631579 |
| 573 | 6296 | 6 | NaN | 1 | 41 | 3 | 5 | 1 | 4 | 1 | 3 | 14 | 100 | 50 | 80 | 33.333333 | 75 | 73.684211 |
| 574 | 5806 | 6 | NaN | 1 | 40 | 3 | 2 | 1 | 3 | 2 | 3 | 11 | 40 | 50 | 60 | 66.666667 | 75 | 57.894737 |
| 575 | 1086 | 6 | NaN | 1 | 43 | 3 | 3 | 1 | 3 | 3 | 1 | 11 | 60 | 50 | 60 | 100.000000 | 25 | 57.894737 |
| 576 | 679 | 6 | NaN | 3 | 40 | 3 | 2 | 0 | 1 | 1 | 1 | 5 | 40 | 0 | 20 | 33.333333 | 25 | 26.315789 |
577 rows × 18 columns
In [8]:
# Absolute mean score for each facet (Sort by Income)
df3[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4','Total Score_19']].mean(axis = 0).plot(kind = 'bar', title = 'Absolute mean score for each facet (Sort by Income)')
Out[8]:
<Axes: title={'center': 'Absolute mean score for each facet (Sort by Income)'}>
In [9]:
# Normalised mean score for each facet (Sort by Income)
(df3[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean(axis = 0)).plot(kind = 'bar', title = 'Normalised mean score for each facet (Sort by Income)')
Out[9]:
<Axes: title={'center': 'Normalised mean score for each facet (Sort by Income)'}>
In [10]:
# Normalised mean of each facet as a numerical (sorted by income)
df3.groupby('Q12. Income')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%']].mean()
Out[10]:
| Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | |
|---|---|---|---|---|---|
| Q12. Income | |||||
| 1 | 48.888889 | 38.333333 | 58.000000 | 60.740741 | 45.555556 |
| 2 | 57.217391 | 41.739130 | 63.304348 | 61.739130 | 50.869565 |
| 3 | 58.181818 | 42.045455 | 58.787879 | 63.131313 | 50.189394 |
| 4 | 62.886598 | 46.391753 | 63.711340 | 62.886598 | 55.154639 |
| 5 | 52.876712 | 36.301370 | 60.547945 | 57.990868 | 50.000000 |
| 6 | 64.857143 | 48.571429 | 69.142857 | 67.619048 | 59.642857 |
In [11]:
# Count of responses for each category (sorted by income)
df3.groupby('Q12. Income')[['responseid']].count()
Out[11]:
| responseid | |
|---|---|
| Q12. Income | |
| 1 | 90 |
| 2 | 115 |
| 3 | 132 |
| 4 | 97 |
| 5 | 73 |
| 6 | 70 |
In [12]:
# Absolute mean of Total Facet score line plot (sorted by income)
df3.groupby('Q12. Income')[['Total Score_19']].mean().plot( kind = 'line', title = 'Absolute mean of Total Facet score line plot (sorted by income)').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[12]:
<matplotlib.legend.Legend at 0x235196c3e10>
In [13]:
# Absolute mean of each Facet score line plot (sorted by income)
df3.groupby('Q12. Income')[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[13]:
<matplotlib.legend.Legend at 0x23519843690>
In [14]:
# Normalised mean of each Facet score line plot (sorted by income)
df3.groupby('Q12. Income')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().plot( kind = 'line', title = 'Normalised mean of each Facet score line plot (sorted by income)').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[14]:
<matplotlib.legend.Legend at 0x23519878b50>
In [15]:
# Normalised mean of each Facet score stacked plot (sorted by income)
df3.groupby('Q12. Income')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%']].mean().plot( kind = 'bar', title = 'Normalised mean of each Facet score stacked plot (sorted by income)', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[15]:
<matplotlib.legend.Legend at 0x235198f3e10>
In [16]:
# Normalised mean of each Facet score hist plot (sorted by income)
df3.groupby('Q12. Income')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each wealth response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[16]:
<matplotlib.legend.Legend at 0x2351a1566d0>
In [17]:
# Trend line for Absolute Total Facet Score vs Q12. Income
sns.regplot (data = df3, x = 'Q12. Income', y = 'Total Score_19')
Out[17]:
<Axes: xlabel='Q12. Income', ylabel='Total Score_19'>
In [18]:
# Trend line for Absolute Independent Facet Score vs Q12. Income reponses
fig, ax = plt.subplots(figsize=(6, 6))
sns.regplot (data = df3, x = 'Q12. Income', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax, label='Certainty score_5')
sns.regplot (data = df3, x = 'Q12. Income', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax, label='Uncertainty score_2')
sns.regplot (data = df3, x = 'Q12. Income', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax, label='Number Comprehension score_5')
sns.regplot (data = df3, x = 'Q12. Income', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax, label='Graph Comprehension score_3')
sns.regplot (data = df3, x = 'Q12. Income', y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax, label='Calculation score_4' )
ax.set(ylabel='Scores', xlabel='Q12. Income')
ax.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [19]:
# Violine Plot for Total Score_19 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Total Score_19')
Out[19]:
<Axes: xlabel='Q12. Income', ylabel='Total Score_19'>
In [20]:
# Violine Plot for Total Score_19 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Calculation score_4')
Out[20]:
<Axes: xlabel='Q12. Income', ylabel='Calculation score_4'>
In [21]:
# Violine Plot for Graph Comprehension score_3 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Graph Comprehension score_3')
Out[21]:
<Axes: xlabel='Q12. Income', ylabel='Graph Comprehension score_3'>
In [22]:
# Violine Plot for Number Comprehension score_5 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Number Comprehension score_5')
Out[22]:
<Axes: xlabel='Q12. Income', ylabel='Number Comprehension score_5'>
In [23]:
# Violine Plot for Uncertainty score_2 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Uncertainty score_2')
Out[23]:
<Axes: xlabel='Q12. Income', ylabel='Uncertainty score_2'>
In [24]:
# Violine Plot for Certainty score_5 for each entry under Wealth.
sns.violinplot( data = df3, x = 'Q12. Income', y = 'Certainty score_5')
Out[24]:
<Axes: xlabel='Q12. Income', ylabel='Certainty score_5'>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [25]:
# Data Frame 2 or df2 is an aggregate data on facet total scores and over all total score, along with wealth and income data vs each response.
# This data set is also sorted by wealth with all responses 'NA' removed.
# We also assign quartiles ranking each response according to this sort.
df4 = df2.sort_values(by = 'Q14. Wealth')
df4 = df4.reset_index(drop = True)
df4 = df4.dropna(axis = 0, subset = 'Q14. Wealth')
df4.insert(loc = len(df4.columns), column = "Quartile Number", value = pd.qcut(df4["Q14. Wealth"],q = 4, labels = False ) + 1, allow_duplicates = 'False')
df4
Out[25]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2305 | 4 | 0.0 | 1 | 29 | 2 | 1 | 0 | 3 | 2 | 2 | 8 | 20 | 0 | 60 | 66.666667 | 50 | 42.105263 | 1 |
| 1 | 5760 | 3 | 0.0 | 2 | 57 | 3 | 1 | 0 | 3 | 3 | 1 | 8 | 20 | 0 | 60 | 100.000000 | 25 | 42.105263 | 1 |
| 2 | 5463 | 6 | 0.0 | 1 | 30 | 2 | 2 | 1 | 4 | 3 | 2 | 12 | 40 | 50 | 80 | 100.000000 | 50 | 63.157895 | 1 |
| 3 | 796 | 2 | 0.0 | 1 | 35 | 2 | 5 | 2 | 4 | 2 | 3 | 16 | 100 | 100 | 80 | 66.666667 | 75 | 84.210526 | 1 |
| 4 | 2578 | 3 | 0.0 | 3 | 33 | 2 | 1 | 0 | 2 | 2 | 0 | 5 | 20 | 0 | 40 | 66.666667 | 0 | 26.315789 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 375 | 15 | 6 | 1250000.0 | 2 | 68 | 4 | 2 | 1 | 4 | 3 | 3 | 13 | 40 | 50 | 80 | 100.000000 | 75 | 68.421053 | 4 |
| 376 | 970 | 6 | 1350000.0 | 3 | 64 | 4 | 2 | 1 | 4 | 3 | 4 | 14 | 40 | 50 | 80 | 100.000000 | 100 | 73.684211 | 4 |
| 377 | 5592 | 7 | 1500000.0 | 2 | 64 | 4 | 3 | 1 | 5 | 3 | 3 | 15 | 60 | 50 | 100 | 100.000000 | 75 | 78.947368 | 4 |
| 378 | 10809 | 6 | 1650000.0 | 1 | 59 | 3 | 3 | 2 | 5 | 3 | 2 | 15 | 60 | 100 | 100 | 100.000000 | 50 | 78.947368 | 4 |
| 379 | 278 | 6 | 3500000.0 | 1 | 66 | 4 | 5 | 2 | 3 | 3 | 4 | 17 | 100 | 100 | 60 | 100.000000 | 100 | 89.473684 | 4 |
380 rows × 19 columns
In [26]:
# Absolute mean score for each facet (Sort by Wealth)
df4[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4','Total Score_19']].mean(axis = 0).plot(kind = 'bar', title = 'Absolute mean score for each facet (Sort by Wealth)')
Out[26]:
<Axes: title={'center': 'Absolute mean score for each facet (Sort by Wealth)'}>
In [27]:
# Normalised mean score for each facet (Sort by Wealth)
(df4[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean(axis = 0)).plot(kind = 'bar', title = 'Normalised mean score for each facet (Sort by Wealth)')
Out[27]:
<Axes: title={'center': 'Normalised mean score for each facet (Sort by Wealth)'}>
In [28]:
# Line Plot of Absolute Total Facet score vs sorted wealth
df4.plot(use_index = 'True' , kind = 'line', y = ['Total Score_19'], title = 'Total responses line plot sorted by Wealth',xlabel = 'Unique responses sorted by wealth', ylabel = 'Total Score_19')
Out[28]:
<Axes: title={'center': 'Total responses line plot sorted by Wealth'}, xlabel='Unique responses sorted by wealth', ylabel='Total Score_19'>
In [29]:
# Absolute Facet scores line plot from low to high wealth
# I do understand that this is non readable
df4[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4']].plot( kind = 'line', title = 'Absolute Facet scores line plot', use_index = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[29]:
<matplotlib.legend.Legend at 0x2351d65f950>
In [30]:
# Trend line for Absolute Total Facet Score vs unique Q14. Wealth reponses
sns.regplot (data = df4, x = df4.index, y = 'Total Score_19')
Out[30]:
<Axes: ylabel='Total Score_19'>
In [31]:
# Trend line for Absolute Independent Facet Score vs Q14. Wealth reponses
fig1, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4, x = df4.index, y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax1, label='Certainty score_5')
sns.regplot (data = df4, x = df4.index, y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax1, label='Uncertainty score_2')
sns.regplot (data = df4, x = df4.index, y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax1, label='Number Comprehension score_5')
sns.regplot (data = df4, x = df4.index, y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax1, label='Graph Comprehension score_3')
sns.regplot (data = df4, x = df4.index, y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax1, label='Calculation score_4' )
ax1.set(ylabel='Scores', xlabel='Q14. Wealth')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [32]:
# Count of number of responses under each quartile
df4.groupby('Quartile Number')[['Quartile Number']].count()
Out[32]:
| Quartile Number | |
|---|---|
| Quartile Number | |
| 1 | 122 |
| 2 | 74 |
| 3 | 89 |
| 4 | 95 |
In [33]:
df4.loc[df4['Quartile Number']==1]
Out[33]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2305 | 4 | 0.0 | 1 | 29 | 2 | 1 | 0 | 3 | 2 | 2 | 8 | 20 | 0 | 60 | 66.666667 | 50 | 42.105263 | 1 |
| 1 | 5760 | 3 | 0.0 | 2 | 57 | 3 | 1 | 0 | 3 | 3 | 1 | 8 | 20 | 0 | 60 | 100.000000 | 25 | 42.105263 | 1 |
| 2 | 5463 | 6 | 0.0 | 1 | 30 | 2 | 2 | 1 | 4 | 3 | 2 | 12 | 40 | 50 | 80 | 100.000000 | 50 | 63.157895 | 1 |
| 3 | 796 | 2 | 0.0 | 1 | 35 | 2 | 5 | 2 | 4 | 2 | 3 | 16 | 100 | 100 | 80 | 66.666667 | 75 | 84.210526 | 1 |
| 4 | 2578 | 3 | 0.0 | 3 | 33 | 2 | 1 | 0 | 2 | 2 | 0 | 5 | 20 | 0 | 40 | 66.666667 | 0 | 26.315789 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 117 | 8449 | 2 | 5000.0 | 3 | 38 | 3 | 5 | 0 | 2 | 3 | 2 | 12 | 100 | 0 | 40 | 100.000000 | 50 | 63.157895 | 1 |
| 118 | 8979 | 2 | 5000.0 | 3 | 26 | 2 | 4 | 1 | 3 | 1 | 1 | 10 | 80 | 50 | 60 | 33.333333 | 25 | 52.631579 | 1 |
| 119 | 10476 | 4 | 5000.0 | 1 | 23 | 2 | 0 | 0 | 3 | 2 | 2 | 7 | 0 | 0 | 60 | 66.666667 | 50 | 36.842105 | 1 |
| 120 | 9214 | 3 | 5000.0 | 2 | 35 | 2 | 3 | 0 | 4 | 0 | 3 | 10 | 60 | 0 | 80 | 0.000000 | 75 | 52.631579 | 1 |
| 121 | 1952 | 1 | 5000.0 | 3 | 61 | 4 | 1 | 2 | 3 | 2 | 2 | 10 | 20 | 100 | 60 | 66.666667 | 50 | 52.631579 | 1 |
122 rows × 19 columns
In [34]:
# This graph is basically a line plot of the Total score_19 across first quartile unique responses which are ordered by income
df4.loc[df4['Quartile Number']==1].plot(use_index = 'True' , kind = 'line', y = ['Total Score_19'], title = 'Total Score line plot sorted by Wealth Q1',xlabel = 'Unique responses', ylabel = 'Total Score_19')
Out[34]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q1'}, xlabel='Unique responses', ylabel='Total Score_19'>
In [35]:
# Trend line for Absolute Total Facet Score vs unique Q14. Income reponses in Quartile 1
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Total Score_19')
Out[35]:
<Axes: ylabel='Total Score_19'>
In [36]:
# Trend line for Absolute Independent Facet Score vs Q14. Wealth reponses in Quartile 1
fig1, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax2, label='Certainty score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax2, label='Uncertainty score_2')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax2, label='Number Comprehension score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax2, label='Graph Comprehension score_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax2, label='Calculation score_4' )
ax2.set(ylabel='Scores', xlabel='Q14. Wealth')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [37]:
df4.loc[df4['Quartile Number']==2]
Out[37]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 122 | 2053 | 2 | 6000.0 | 3 | 23 | 2 | 5 | 0 | 0 | 2 | 2 | 9 | 100 | 0 | 0 | 66.666667 | 50 | 47.368421 | 2 |
| 123 | 1326 | 1 | 6200.0 | 1 | 49 | 3 | 4 | 1 | 3 | 2 | 1 | 11 | 80 | 50 | 60 | 66.666667 | 25 | 57.894737 | 2 |
| 124 | 10822 | 5 | 6666.0 | 2 | 28 | 2 | 3 | 0 | 3 | 0 | 0 | 6 | 60 | 0 | 60 | 0.000000 | 0 | 31.578947 | 2 |
| 125 | 6128 | 1 | 8000.0 | 1 | 25 | 2 | 5 | 2 | 4 | 2 | 3 | 16 | 100 | 100 | 80 | 66.666667 | 75 | 84.210526 | 2 |
| 126 | 13429 | 2 | 8000.0 | 2 | 24 | 2 | 5 | 0 | 3 | 2 | 2 | 12 | 100 | 0 | 60 | 66.666667 | 50 | 63.157895 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | 11387 | 6 | 30000.0 | 2 | 37 | 3 | 3 | 0 | 4 | 2 | 3 | 12 | 60 | 0 | 80 | 66.666667 | 75 | 63.157895 | 2 |
| 192 | 13397 | 3 | 30000.0 | 1 | 28 | 2 | 1 | 1 | 4 | 2 | 2 | 10 | 20 | 50 | 80 | 66.666667 | 50 | 52.631579 | 2 |
| 193 | 6789 | 4 | 30000.0 | 1 | 52 | 3 | 3 | 2 | 4 | 2 | 3 | 14 | 60 | 100 | 80 | 66.666667 | 75 | 73.684211 | 2 |
| 194 | 391 | 2 | 30000.0 | 3 | 52 | 3 | 2 | 1 | 0 | 1 | 3 | 7 | 40 | 50 | 0 | 33.333333 | 75 | 36.842105 | 2 |
| 195 | 10277 | 6 | 30000.0 | 1 | 31 | 2 | 3 | 2 | 4 | 3 | 2 | 14 | 60 | 100 | 80 | 100.000000 | 50 | 73.684211 | 2 |
74 rows × 19 columns
In [38]:
# This graph is basically a line plot of the Total score_19 across second quartile unique responses which are ordered by income
df4.loc[df4['Quartile Number']==2].plot(use_index = 'True' , kind = 'line', y = ['Total Score_19'], title = 'Total Score line plot sorted by Wealth Q1',xlabel = 'Unique responses', ylabel = 'Total Score_19')
Out[38]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q1'}, xlabel='Unique responses', ylabel='Total Score_19'>
In [39]:
# Trend line for Absolute Total Facet Score vs unique Q14. Wealth reponses in Quartile 2
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Total Score_19')
Out[39]:
<Axes: ylabel='Total Score_19'>
In [40]:
# Trend line for Absolute Independent Facet Score vs Q14. Wealth reponses in Quartile 2
fig1, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax3, label='Certainty score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax3, label='Uncertainty score_2')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax3, label='Number Comprehension score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax3, label='Graph Comprehension score_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax3, label='Calculation score_4' )
ax3.set(ylabel='Scores', xlabel='Q14. Wealth')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [41]:
df4.loc[df4['Quartile Number']==3]
Out[41]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 196 | 5909 | 3 | 32500.0 | 2 | 30 | 2 | 5 | 0 | 1 | 1 | 3 | 10 | 100 | 0 | 20 | 33.333333 | 75 | 52.631579 | 3 |
| 197 | 10855 | 4 | 34000.0 | 2 | 55 | 3 | 2 | 0 | 2 | 1 | 2 | 7 | 40 | 0 | 40 | 33.333333 | 50 | 36.842105 | 3 |
| 198 | 14112 | 3 | 34865.0 | 2 | 32 | 2 | 5 | 0 | 3 | 2 | 3 | 13 | 100 | 0 | 60 | 66.666667 | 75 | 68.421053 | 3 |
| 199 | 13314 | 5 | 35000.0 | 2 | 30 | 2 | 3 | 0 | 1 | 3 | 0 | 7 | 60 | 0 | 20 | 100.000000 | 0 | 36.842105 | 3 |
| 200 | 10801 | 3 | 35000.0 | 2 | 52 | 3 | 1 | 2 | 3 | 3 | 2 | 11 | 20 | 100 | 60 | 100.000000 | 50 | 57.894737 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 280 | 9549 | 4 | 160000.0 | 2 | 42 | 3 | 5 | 0 | 4 | 3 | 3 | 15 | 100 | 0 | 80 | 100.000000 | 75 | 78.947368 | 3 |
| 281 | 9194 | 3 | 160000.0 | 2 | 43 | 3 | 3 | 1 | 2 | 2 | 1 | 9 | 60 | 50 | 40 | 66.666667 | 25 | 47.368421 | 3 |
| 282 | 1013 | 5 | 175000.0 | 3 | 73 | 4 | 4 | 1 | 4 | 2 | 2 | 13 | 80 | 50 | 80 | 66.666667 | 50 | 68.421053 | 3 |
| 283 | 8166 | 6 | 175000.0 | 2 | 60 | 4 | 3 | 1 | 4 | 3 | 3 | 14 | 60 | 50 | 80 | 100.000000 | 75 | 73.684211 | 3 |
| 284 | 5837 | 4 | 180000.0 | 2 | 54 | 3 | 5 | 1 | 4 | 2 | 3 | 15 | 100 | 50 | 80 | 66.666667 | 75 | 78.947368 | 3 |
89 rows × 19 columns
In [42]:
# This graph is basically a line plot of the Total score_19 across third quartile unique responses which are ordered by wealth
df4.loc[df4['Quartile Number']==3].plot(use_index = 'True' , kind = 'line', y = ['Total Score_19'], title = 'Total Score line plot sorted by Wealth Q3',xlabel = 'Unique responses', ylabel = 'Total Score_19')
Out[42]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q3'}, xlabel='Unique responses', ylabel='Total Score_19'>
In [43]:
# Trend line for Absolute Total Facet Score vs unique Q14. Wealth reponses in Quartile 3
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Total Score_19')
Out[43]:
<Axes: ylabel='Total Score_19'>
In [44]:
# Trend line for Absolute Independent Facet Score vs Q14. Wealth reponses in Quartile 3
fig1, ax4 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax4, label='Certainty score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax4, label='Uncertainty score_2')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax4, label='Number Comprehension score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax4, label='Graph Comprehension score_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax4, label='Calculation score_4' )
ax4.set(ylabel='Scores', xlabel='Q14. Wealth')
ax4.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [45]:
df4.loc[df4['Quartile Number']==4]
Out[45]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 285 | 11536 | 4 | 185000.0 | 2 | 51 | 3 | 2 | 1 | 2 | 0 | 1 | 6 | 40 | 50 | 40 | 0.000000 | 25 | 31.578947 | 4 |
| 286 | 1315 | 2 | 200000.0 | 2 | 57 | 3 | 1 | 2 | 4 | 2 | 3 | 12 | 20 | 100 | 80 | 66.666667 | 75 | 63.157895 | 4 |
| 287 | 490 | 1 | 200000.0 | 3 | 65 | 4 | 1 | 1 | 4 | 3 | 1 | 10 | 20 | 50 | 80 | 100.000000 | 25 | 52.631579 | 4 |
| 288 | 13403 | 5 | 200000.0 | 2 | 32 | 2 | 3 | 1 | 3 | 2 | 2 | 11 | 60 | 50 | 60 | 66.666667 | 50 | 57.894737 | 4 |
| 289 | 13350 | 6 | 200000.0 | 1 | 30 | 2 | 2 | 2 | 3 | 2 | 2 | 11 | 40 | 100 | 60 | 66.666667 | 50 | 57.894737 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 375 | 15 | 6 | 1250000.0 | 2 | 68 | 4 | 2 | 1 | 4 | 3 | 3 | 13 | 40 | 50 | 80 | 100.000000 | 75 | 68.421053 | 4 |
| 376 | 970 | 6 | 1350000.0 | 3 | 64 | 4 | 2 | 1 | 4 | 3 | 4 | 14 | 40 | 50 | 80 | 100.000000 | 100 | 73.684211 | 4 |
| 377 | 5592 | 7 | 1500000.0 | 2 | 64 | 4 | 3 | 1 | 5 | 3 | 3 | 15 | 60 | 50 | 100 | 100.000000 | 75 | 78.947368 | 4 |
| 378 | 10809 | 6 | 1650000.0 | 1 | 59 | 3 | 3 | 2 | 5 | 3 | 2 | 15 | 60 | 100 | 100 | 100.000000 | 50 | 78.947368 | 4 |
| 379 | 278 | 6 | 3500000.0 | 1 | 66 | 4 | 5 | 2 | 3 | 3 | 4 | 17 | 100 | 100 | 60 | 100.000000 | 100 | 89.473684 | 4 |
95 rows × 19 columns
In [46]:
# This graph is basically a line plot of the Total score_19 across fourth quartile unique responses which are ordered by Wealth
df4.loc[df4['Quartile Number']==4].plot(use_index = 'True' , kind = 'line', y = ['Total Score_19'], title = 'Total Score line plot sorted by Wealth Q4',xlabel = 'Unique responses', ylabel = 'Total Score_19')
Out[46]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q4'}, xlabel='Unique responses', ylabel='Total Score_19'>
In [47]:
# Trend line for Absolute Total Facet Score vs unique Q14. Wealth reponses in Quartile 4
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Total Score_19')
Out[47]:
<Axes: ylabel='Total Score_19'>
In [48]:
# Trend line for Absolute Independent Facet Score vs Q14. Wealth reponses in Quartile 4
fig1, ax5 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax5, label='Certainty score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax5, label='Uncertainty score_2')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax5, label='Number Comprehension score_5')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax5, label='Graph Comprehension score_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax5, label='Calculation score_4' )
ax5.set(ylabel='Scores', xlabel='Q14. Wealth')
ax5.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [49]:
# Absolute Total Facet scores mean line plot from low to high Wealth
df4.groupby('Quartile Number')[['Total Score_19']].mean().plot( kind = 'line', title = 'Absolute Total Facet scores mean line plot low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[49]:
<matplotlib.legend.Legend at 0x2351f5c6c90>
In [50]:
# Absolute Facet scores mean line plot from low to high wealth
df4.groupby('Quartile Number')[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[50]:
<matplotlib.legend.Legend at 0x2351f32f550>
In [51]:
# Normalised Facet scores mean line plot from low to high wealth
df4.groupby('Quartile Number')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().plot( kind = 'line', title = 'Normalised Facet scores mean line plot from low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[51]:
<matplotlib.legend.Legend at 0x2351f63ac90>
In [52]:
# Normalised mean scores for each facet stacked
df4.groupby('Quartile Number')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%']].mean().plot( kind = 'bar', title = 'Normalised mean scores for each facet stacked', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[52]:
<matplotlib.legend.Legend at 0x2351fc8b290>
In [53]:
# Normalised mean scores for each facet for each wealth response Quartile
df4.groupby('Quartile Number')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each wealth response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[53]:
<matplotlib.legend.Legend at 0x23520358850>
In [54]:
# Trend line for Absolute Total Facet Score vs Quartiles
sns.regplot (data = df4, x = 'Quartile Number', y = 'Total Score_19')
Out[54]:
<Axes: xlabel='Quartile Number', ylabel='Total Score_19'>
In [55]:
# Trend line for Absolute Independent Facet Score vs Quartiles
fig2, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4, x = 'Quartile Number', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax2, label='Certainty score_5')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax2, label='Uncertainty score_2')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax2, label='Number Comprehension score_5')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax2, label='Graph Comprehension score_3')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax2, label='Calculation score_4' )
ax2.set(ylabel='Scores', xlabel='Q14. Wealth')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [56]:
# Violine Plot for Total Score_19 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Total Score_19')
Out[56]:
<Axes: xlabel='Quartile Number', ylabel='Total Score_19'>
In [57]:
# Violine Plot for Calculation score_4 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Calculation score_4')
Out[57]:
<Axes: xlabel='Quartile Number', ylabel='Calculation score_4'>
In [58]:
# Violine Plot for Graph Comprehension score_3 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Graph Comprehension score_3')
Out[58]:
<Axes: xlabel='Quartile Number', ylabel='Graph Comprehension score_3'>
In [59]:
# Violine Plot for Number Comprehension score_5 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Number Comprehension score_5')
Out[59]:
<Axes: xlabel='Quartile Number', ylabel='Number Comprehension score_5'>
In [60]:
# Violine Plot for Certainty score_5 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Certainty score_5')
Out[60]:
<Axes: xlabel='Quartile Number', ylabel='Certainty score_5'>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [61]:
# Data Frame 5 or df5 is an aggregate data on facet total scores and over all total score, along with education data vs each response.
# This data set is now sorted by education.
df5 = df2.sort_values(by = 'isced')
df5 = df5.reset_index(drop = True)
df5
Out[61]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6873 | 6 | NaN | 1 | 25 | 2 | 1 | 1 | 2 | 1 | 1 | 6 | 20 | 50 | 40 | 33.333333 | 25 | 31.578947 |
| 1 | 7875 | 5 | NaN | 1 | 63 | 4 | 1 | 1 | 3 | 2 | 2 | 9 | 20 | 50 | 60 | 66.666667 | 50 | 47.368421 |
| 2 | 7876 | 6 | 25000.0 | 1 | 41 | 3 | 3 | 2 | 4 | 3 | 3 | 15 | 60 | 100 | 80 | 100.000000 | 75 | 78.947368 |
| 3 | 7877 | 7 | NaN | 1 | 55 | 3 | 5 | 2 | 4 | 3 | 3 | 17 | 100 | 100 | 80 | 100.000000 | 75 | 89.473684 |
| 4 | 7884 | 6 | 950000.0 | 1 | 52 | 3 | 5 | 2 | 5 | 2 | 4 | 18 | 100 | 100 | 100 | 66.666667 | 100 | 94.736842 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 647 | 78 | 1 | NaN | 3 | 51 | 3 | 2 | 2 | 5 | 3 | 2 | 14 | 40 | 100 | 100 | 100.000000 | 50 | 73.684211 |
| 648 | 6130 | 1 | NaN | 3 | 32 | 2 | 2 | 0 | 2 | 1 | 0 | 5 | 40 | 0 | 40 | 33.333333 | 0 | 26.315789 |
| 649 | 6138 | 7 | NaN | 3 | 34 | 2 | 2 | 1 | 3 | 0 | 0 | 6 | 40 | 50 | 60 | 0.000000 | 0 | 31.578947 |
| 650 | 467 | 7 | NaN | 3 | 55 | 3 | 4 | 0 | 3 | 2 | 3 | 12 | 80 | 0 | 60 | 66.666667 | 75 | 63.157895 |
| 651 | 405 | 2 | NaN | 3 | 48 | 3 | 5 | 0 | 4 | 1 | 2 | 12 | 100 | 0 | 80 | 33.333333 | 50 | 63.157895 |
652 rows × 18 columns
In [62]:
# since we didnt drop any values, the dataset is essentially the same as df2, but sorted
In [63]:
# Mean of each facet as a % for each category of isced
df5.groupby('isced')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%']].mean()
Out[63]:
| Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | |
|---|---|---|---|---|---|
| isced | |||||
| 1 | 60.000000 | 47.727273 | 66.136364 | 66.035354 | 57.670455 |
| 2 | 55.462185 | 36.134454 | 60.000000 | 60.224090 | 46.008403 |
| 3 | 53.466667 | 41.333333 | 57.466667 | 58.222222 | 46.166667 |
In [64]:
# Count of responses for each category isced
df5.groupby('isced')[['responseid']].count()
Out[64]:
| responseid | |
|---|---|
| isced | |
| 1 | 264 |
| 2 | 238 |
| 3 | 150 |
In [65]:
# Absolute Total Facet scores mean line plot from low to high education
df5.groupby('isced')[['Total Score_19']].mean().plot( kind = 'line', title = 'Absolute Total Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[65]:
<matplotlib.legend.Legend at 0x2351f17b290>
In [66]:
# Absolute Facet scores mean line plot from low to high edu
df5.groupby('isced')[['Certainty score_5','Uncertainty score_2','Number Comprehension score_5','Graph Comprehension score_3','Calculation score_4']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[66]:
<matplotlib.legend.Legend at 0x2352009aad0>
In [67]:
# Normalised Facet scores mean line plot from low to high edu
df5.groupby('isced')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().plot( kind = 'line', title = 'Normalised Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[67]:
<matplotlib.legend.Legend at 0x23520122410>
In [68]:
# Normalised mean scores for each facet stacked
df5.groupby('isced')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%']].mean().plot( kind = 'bar', title = 'Normalised mean scores for each facet stacked', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[68]:
<matplotlib.legend.Legend at 0x23521ba8d90>
In [69]:
# Normalised mean scores for each facet for each edu response category
df5.groupby('isced')[['Certainty score_%','Uncertainty score_%','Number Comprehension score_%','Graph Comprehension score_%','Calculation score_%','Total Score_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each edu response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[69]:
<matplotlib.legend.Legend at 0x235217e3150>
In [70]:
# Trend line for Absolute Total Facet Score vs edu (isced) reponses
sns.regplot (data = df5, x = 'isced', y = 'Total Score_19')
Out[70]:
<Axes: xlabel='isced', ylabel='Total Score_19'>
In [71]:
# Trend line for Absolute Independent Facet Score vs edu (isced) reponses
fig, ax6 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df5, x = 'isced', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax6, label='Certainty score_5')
sns.regplot (data = df5, x = 'isced', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax6, label='Uncertainty score_2')
sns.regplot (data = df5, x = 'isced', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax6, label='Number Comprehension score_5')
sns.regplot (data = df5, x = 'isced', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax6, label='Graph Comprehension score_3')
sns.regplot (data = df5, x = 'isced', y = 'Calculation score_4',fit_reg=True, ci=None, ax=ax6, label='Calculation score_4' )
ax6.set(ylabel='Scores', xlabel='isced')
ax6.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [72]:
# Violine Plot for Total Score_19 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Total Score_19')
Out[72]:
<Axes: xlabel='isced', ylabel='Total Score_19'>
In [73]:
# Violine Plot for Total Score_19 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Calculation score_4')
Out[73]:
<Axes: xlabel='isced', ylabel='Calculation score_4'>
In [74]:
# Violine Plot for Graph Comprehension score_3 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Graph Comprehension score_3')
Out[74]:
<Axes: xlabel='isced', ylabel='Graph Comprehension score_3'>
In [75]:
# Violine Plot for Number Comprehension score_5 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Number Comprehension score_5')
Out[75]:
<Axes: xlabel='isced', ylabel='Number Comprehension score_5'>
In [76]:
# Violine Plot for Uncertainty score_2 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Uncertainty score_2')
Out[76]:
<Axes: xlabel='isced', ylabel='Uncertainty score_2'>
In [77]:
# Violine Plot for Certainty score_5 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Certainty score_5')
Out[77]:
<Axes: xlabel='isced', ylabel='Certainty score_5'>
In [78]:
# Descriptive stats for the data set, isced = 1
# NA values of wealth are removed
df6 = df5.dropna(axis = 0, subset = 'Q14. Wealth')
df6.drop(df6[df6['Q14. Wealth'] == 3500000].index, inplace = True)
df6.loc[df6['isced']==1][['Q14. Wealth']].describe()
C:\Users\SHIVAM\AppData\Local\Temp\ipykernel_11988\3962586432.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df6.drop(df6[df6['Q14. Wealth'] == 3500000].index, inplace = True)
Out[78]:
| Q14. Wealth | |
|---|---|
| count | 1.690000e+02 |
| mean | 1.708374e+05 |
| std | 2.564941e+05 |
| min | 0.000000e+00 |
| 25% | 5.000000e+03 |
| 50% | 5.500000e+04 |
| 75% | 2.500000e+05 |
| max | 1.650000e+06 |
In [79]:
# Descriptive stats for the data set, isced = 2
df6.loc[df6['isced']==2][['Q14. Wealth']].describe()
Out[79]:
| Q14. Wealth | |
|---|---|
| count | 1.360000e+02 |
| mean | 1.387606e+05 |
| std | 2.632906e+05 |
| min | 0.000000e+00 |
| 25% | 5.000000e+03 |
| 50% | 3.000000e+04 |
| 75% | 1.600000e+05 |
| max | 1.500000e+06 |
In [80]:
# Descriptive stats for the data set, isced = 3
df6.loc[df6['isced']==3][['Q14. Wealth']].describe()
Out[80]:
| Q14. Wealth | |
|---|---|
| count | 7.400000e+01 |
| mean | 9.383407e+04 |
| std | 2.127450e+05 |
| min | 0.000000e+00 |
| 25% | 1.000000e+03 |
| 50% | 1.000000e+04 |
| 75% | 4.875000e+04 |
| max | 1.350000e+06 |
In [81]:
# Trend line for Absolute Tota Facet Score vs wealth reponses sorted by isced and ORDERED by wealth WITH scatter
fig, ax7 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax7, label='ISCED = 1')
sns.regplot (data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax7, label='ISCED = 2')
sns.regplot (data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax7, label='ISCED = 3')
ax7.set(ylabel='Total Scores_19', xlabel='Wealth')
ax7.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [82]:
# Trend line for Absolute Tota Facet Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax8 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 3')
ax8.set(ylabel='Total Scores_19', xlabel='Wealth')
ax8.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [83]:
# Trend line for Absolute Certainty Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax9 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 3')
ax9.set(ylabel='Certainty score_5', xlabel='Wealth')
ax9.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [84]:
# Trend line for Absolute Uncertainty Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax10 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 3')
ax10.set(ylabel='Uncertainty score_2', xlabel='Wealth')
ax10.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [85]:
# Trend line for Absolute Number Comprehension Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Number Comprehension score_5', xlabel='Wealth')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [86]:
# Trend line for Absolute Graph Comprehension Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Graph Comprehension score_3', xlabel='Wealth')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [87]:
# Trend line for Absolute Calculation Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax12 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['Q14. Wealth'], y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['Q14. Wealth'], y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['Q14. Wealth'], y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 3')
ax12.set(ylabel='Calculation score_4', xlabel='Wealth')
ax12.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [88]:
# Descriptive stats for the data set, isced = 1
# NA values of income are removed
df7 = df5
df7.drop(df7[df7['Q12. Income'] == 7].index, inplace = True)
df7.loc[df7['isced']==1][['Q12. Income']].describe()
Out[88]:
| Q12. Income | |
|---|---|
| count | 240.000000 |
| mean | 3.704167 |
| std | 1.628712 |
| min | 1.000000 |
| 25% | 2.000000 |
| 50% | 4.000000 |
| 75% | 5.000000 |
| max | 6.000000 |
In [89]:
df7.loc[df7['isced']==2][['Q12. Income']].describe()
Out[89]:
| Q12. Income | |
|---|---|
| count | 211.000000 |
| mean | 3.175355 |
| std | 1.398148 |
| min | 1.000000 |
| 25% | 2.000000 |
| 50% | 3.000000 |
| 75% | 4.000000 |
| max | 6.000000 |
In [90]:
df7.loc[df7['isced']==3][['Q12. Income']].describe()
Out[90]:
| Q12. Income | |
|---|---|
| count | 126.000000 |
| mean | 2.619048 |
| std | 1.563878 |
| min | 1.000000 |
| 25% | 1.000000 |
| 50% | 2.000000 |
| 75% | 3.000000 |
| max | 6.000000 |
In [91]:
# Trend line for Absolute Tota Facet Score vs income reponses sorted by isced and ORDERED by income WITH scatter
fig, ax7 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[df7['isced']==1]['Total Score_19'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 1')
sns.regplot (data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[df7['isced']==2]['Total Score_19'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 2')
sns.regplot (data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[df7['isced']==3]['Total Score_19'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 3')
ax7.set(ylabel='Total Scores_19', xlabel='INCOME')
ax7.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [92]:
# Trend line for Absolute Tota Facet Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax8 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Total Score_19', fit_reg=True, ci=None, ax=ax8, label='ISCED = 3')
ax8.set(ylabel='Total Scores_19', xlabel='INCOME')
ax8.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [93]:
# Trend line for Absolute Certainty Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax9 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Certainty score_5', fit_reg=True, ci=None, ax=ax9, label='ISCED = 3')
ax9.set(ylabel='Certainty score_5', xlabel='INCOME')
ax9.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [94]:
# Trend line for Absolute Uncertainty Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax10 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Uncertainty score_2', fit_reg=True, ci=None, ax=ax10, label='ISCED = 3')
ax10.set(ylabel='Uncertainty score_2', xlabel='INCOME')
ax10.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [95]:
# Trend line for Absolute Number Comprehension Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Number Comprehension score_5', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Number Comprehension score_5', xlabel='INCOME')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [96]:
# Trend line for Absolute Graph Comprehension Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Graph Comprehension score_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Graph Comprehension score_3', xlabel='INCOME')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [97]:
# Trend line for Absolute Calculation Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax12 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'Q12. Income', y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'Q12. Income', y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'Q12. Income', y = 'Calculation score_4', fit_reg=True, ci=None, ax=ax12, label='ISCED = 3')
ax12.set(ylabel='Calculation score_4', xlabel='INCOME')
ax12.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [98]:
df7
Out[98]:
| responseid | Q12. Income | Q14. Wealth | isced | age | age_rec | Certainty score_5 | Uncertainty score_2 | Number Comprehension score_5 | Graph Comprehension score_3 | Calculation score_4 | Total Score_19 | Certainty score_% | Uncertainty score_% | Number Comprehension score_% | Graph Comprehension score_% | Calculation score_% | Total Score_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6873 | 6 | NaN | 1 | 25 | 2 | 1 | 1 | 2 | 1 | 1 | 6 | 20 | 50 | 40 | 33.333333 | 25 | 31.578947 |
| 1 | 7875 | 5 | NaN | 1 | 63 | 4 | 1 | 1 | 3 | 2 | 2 | 9 | 20 | 50 | 60 | 66.666667 | 50 | 47.368421 |
| 2 | 7876 | 6 | 25000.0 | 1 | 41 | 3 | 3 | 2 | 4 | 3 | 3 | 15 | 60 | 100 | 80 | 100.000000 | 75 | 78.947368 |
| 4 | 7884 | 6 | 950000.0 | 1 | 52 | 3 | 5 | 2 | 5 | 2 | 4 | 18 | 100 | 100 | 100 | 66.666667 | 100 | 94.736842 |
| 5 | 8688 | 3 | 155000.0 | 1 | 50 | 3 | 4 | 0 | 4 | 1 | 1 | 10 | 80 | 0 | 80 | 33.333333 | 25 | 52.631579 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 645 | 115 | 2 | 100000.0 | 3 | 49 | 3 | 2 | 1 | 3 | 1 | 2 | 9 | 40 | 50 | 60 | 33.333333 | 50 | 47.368421 |
| 646 | 104 | 1 | 1000.0 | 3 | 67 | 4 | 1 | 0 | 3 | 0 | 1 | 5 | 20 | 0 | 60 | 0.000000 | 25 | 26.315789 |
| 647 | 78 | 1 | NaN | 3 | 51 | 3 | 2 | 2 | 5 | 3 | 2 | 14 | 40 | 100 | 100 | 100.000000 | 50 | 73.684211 |
| 648 | 6130 | 1 | NaN | 3 | 32 | 2 | 2 | 0 | 2 | 1 | 0 | 5 | 40 | 0 | 40 | 33.333333 | 0 | 26.315789 |
| 651 | 405 | 2 | NaN | 3 | 48 | 3 | 5 | 0 | 4 | 1 | 2 | 12 | 100 | 0 | 80 | 33.333333 | 50 | 63.157895 |
577 rows × 18 columns
In [99]:
# Since we already have a classification for Age groups in the form of age_rec, we will use that.
# We will also use median of age to see if it yields any relevant results, as instructed.
# AXES to be used = Age or age groups, ISCED, Income
# Age groups = [2,3,4]
# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 59 y/o
# Age group 4 = 60 to 75 y/o (75 y/o, i.e, within the scope of the data we have, it can mean 60 and above also)
In [100]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_19', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_19', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Total Scores_19', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [101]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_19', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_19', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Total Scores_19', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [102]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Certainty score_5', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Certainty score_5', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Certainty score_5', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [103]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Uncertainty score_2', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Uncertainty score_2', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Uncertainty score_2', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [104]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Number Comprehension score_5', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Number Comprehension score_5', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Number Comprehension score_5', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [105]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Graph Comprehension score_3', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Graph Comprehension score_3', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Graph Comprehension score_3', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [106]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Calculation score_4', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Calculation score_4', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Calculation score_4', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [107]:
# Age median method
# Age_median
age_m = df7['age'].mean()
age_m
Out[107]:
45.98786828422877
In [108]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_19', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_19', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [109]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_19', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Total Score_19'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_19', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [110]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Certainty score_5', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Certainty score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Certainty score_5', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [111]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Uncertainty score_2', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Uncertainty score_2'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Uncertainty score_2', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [112]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Number Comprehension score_5', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Number Comprehension score_5'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Number Comprehension score_5', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [113]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Graph Comprehension score_3', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Graph Comprehension score_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Graph Comprehension score_3', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [114]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Calculation score_4', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'Q12. Income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Calculation score_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Calculation score_4', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [115]:
# 3D graph representation
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(111, projection='3d')
# defining axes
z = df7.loc[:,'age'] # Age on Z axis
x = df7.loc[:,'Q12. Income'] # Income on X axis
y = df7.loc[:,'isced'] # ISCED on Y axis
ax.scatter(x, y, z)
ax.set_title('3d Scatter plot')
ax.set(ylabel='ISCED', xlabel='Income', zlabel = 'Age')
plt.show()
In [116]:
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(111, projection='3d')
z = df7.loc[:,'age'] # Age on Z axis
x = df7.loc[:,'Q12. Income'] # Income on X axis
y = df7.loc[:,'isced'] # ISCED on Y axis
c = df7.loc[:,'Total Score_19'] # on Heat scale
img = ax.scatter(x, y, z, c=c, cmap=plt.hot())
ax.set(ylabel='ISCED', xlabel='Income', zlabel = 'Age')
ax.set_title('3d Scatter plot')
fig.colorbar(img, anchor = (2.0,2.0), label = 'Total Score_19')
plt.show()
In [117]:
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(111, projection='3d')
c = df7.loc[:,'age'] # Age on Heat scale
x = df7.loc[:,'Q12. Income'] # Income on X axis
z = df7.loc[:,'isced'] # ISCED on Z axis
y = df7.loc[:,'Total Score_19'] # on Y axis
img = ax.scatter(x, y, z, c=c, cmap=plt.hot())
ax.set(ylabel='Total Score_19', xlabel='Income', zlabel = 'ISCED')
ax.set_title('3d Scatter plot')
fig.colorbar(img, anchor = (2.0,2.0), label = 'Age')
plt.show()
In [118]:
# 3D plots are amounting to be nigh unreadable, and usually speaking, the methods that we have used above are far more easier to visualise.
# However, I have included them just in case you are able to interpret them in your expertise.
In [ ]:
In [ ]:
In [ ]: